{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook produces Figures 4 and 5 from the Online Appendix."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import Libraries and Set Project Directory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data Analysis\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# Plotting\n",
    "import geopandas as gpd\n",
    "import matplotlib.pyplot as plt \n",
    "import seaborn as sns; sns.set(color_codes=True)\n",
    "import matplotlib as mpl\n",
    "from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
    "from shapely.geometry import Point, Polygon, LineString\n",
    "# Unforuntaely, PyGEOS messes up the overlay function\n",
    "gpd.options.use_pygeos = False \n",
    "plt.rcParams['font.sans-serif'] = 'Times New Roman'\n",
    "plt.rcParams['font.family'] = 'sans-serif'\n",
    "\n",
    "# CHANGE THE FOLLOWING \"PROJECT_DIR\" FILEPATH TO REFLECT LOCATION OF REPLICATION FOLDER\n",
    "PROJECT_DIR = '/Users/jjares/Documents/research/Farm Subsidies/replication_materials'\n",
    "GRAPHICS_OUTDIR = f'{PROJECT_DIR}/graphics'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read in County Shapefile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def move_state(gdf, state, state_colname='State_FIPS', scale_x=1, scale_y=1, translate_x=0, translate_y=0):\n",
    "    state_rows = gdf.loc[gdf[state_colname] == state, 'geometry']\n",
    "    gdf.loc[gdf[state_colname] == state, 'geometry'] = (\n",
    "        state_rows.translate(xoff=translate_x, yoff=translate_y)\n",
    "                  .scale(xfact=scale_x, yfact=scale_y, origin='centroid')\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "state_fips_map = (pd.read_csv(f'{PROJECT_DIR}/data/State FIPS Codes.csv',\n",
    "                              usecols=['FIPS', 'state_abbrev'])\n",
    "                    .assign(FIPS = lambda d: d['FIPS'].astype(str).str.zfill(2).str.strip(),\n",
    "                            state_abbrev = lambda d: d['state_abbrev'].str.strip())\n",
    "                    .set_index('FIPS')\n",
    "                    .squeeze())\n",
    "counties = (gpd.read_file(f'{PROJECT_DIR}/data/tl_2017_us_county')\n",
    "               .query('STATEFP not in [\"60\", \"66\", \"69\", \"72\", \"78\"]')\n",
    "               .rename(columns={'STATEFP': 'State_FIPS', 'COUNTYFP': 'County_FIPS'})\n",
    "               .assign(State = lambda d: d['State_FIPS'].map(state_fips_map)))\n",
    "assert counties['State'].notnull().mean()\n",
    "\n",
    "states = (gpd.read_file(f'{PROJECT_DIR}/data/tl_2017_us_state')\n",
    "             .query('STATEFP not in [\"60\", \"66\", \"69\", \"72\", \"78\"]')\n",
    "             .rename(columns={'STATEFP': 'State_FIPS'}))\n",
    "\n",
    "# Clip the (uninhabited) Northwestern Islands from Honolulu County\n",
    "main_state_bounds = Polygon([(-163, 15), (-163, 24), (-154, 24), (-154, 15), (-163, 15)])\n",
    "main_state_bounds_gdf = gpd.GeoDataFrame([1], geometry=[main_state_bounds], crs=states.crs)\n",
    "replacement_hawaii = gpd.clip(states.loc[states['State_FIPS'] == '15'], main_state_bounds_gdf)\n",
    "states.loc[states['State_FIPS'] == '15'] = replacement_hawaii\n",
    "\n",
    "# Move and scale Alaska and Hawaii\n",
    "move_state(counties, '02', scale_x=0.5, scale_y=0.5, translate_x=15, translate_y=-10)\n",
    "move_state(counties, '15', scale_x=1.5, scale_y=1.5, translate_x=30, translate_y=10)\n",
    "move_state(states, '02', scale_x=0.5, scale_y=0.5, translate_x=15, translate_y=-10)\n",
    "move_state(states, '15', scale_x=1.5, scale_y=1.5, translate_x=30, translate_y=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Merge Counts by Counties with GeoDataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "counts_by_county = (pd.read_csv(f'{PROJECT_DIR}/data/Respondent and Sampling Frame Counts by County.csv')\n",
    "                      .assign(State_FIPS = lambda d: d['State_FIPS'].astype(str).str.zfill(2),\n",
    "                              County_FIPS = lambda d: d['County_FIPS'].astype(str).str.zfill(3)))\n",
    "\n",
    "county_points = counties[['State', 'State_FIPS', 'County_FIPS', 'geometry']].merge(\n",
    "    counts_by_county,\n",
    "    on=['State_FIPS', 'County_FIPS'],\n",
    "    how='left',\n",
    "    validate='1:1'\n",
    ")\n",
    "assert county_points.shape[0] == counties.shape[0]\n",
    "county_points['geometry'] = county_points.geometry.representative_point()\n",
    "county_points[['sf_count', 'respondent_count']] = county_points[['sf_count', 'respondent_count']].fillna(0)\n",
    "counties_points = county_points[['State', 'State_FIPS', 'County_FIPS', 'sf_count', 'respondent_count', 'geometry']]\n",
    "\n",
    "# Hawaii and Alaska maps have very bad state-county overlap\n",
    "# We'll aggregate to the state level and use a state-level representative point for these\n",
    "county_points.loc[county_points['State_FIPS'] == \"02\", 'geometry'] = (states.loc[states['State_FIPS'] == '02', 'geometry']\n",
    "                                                                            .representative_point()\n",
    "                                                                            .item())\n",
    "county_points.loc[county_points['State_FIPS'] == \"15\", 'geometry'] = (states.loc[states['State_FIPS'] == '15', 'geometry']\n",
    "                                                                            .representative_point()\n",
    "                                                                            .item())\n",
    "ak_totals = county_points.query('State_FIPS == \"02\"')[['respondent_count', 'sf_count']].sum()\n",
    "hi_totals = county_points.query('State_FIPS == \"15\"')[['respondent_count', 'sf_count']].sum()\n",
    "county_points.loc[county_points['State_FIPS'] == \"02\", ['respondent_count', 'sf_count']] = ak_totals.values\n",
    "county_points.loc[county_points['State_FIPS'] == \"15\", ['respondent_count', 'sf_count']] = hi_totals.values\n",
    "\n",
    "# Scale marker sizes larger for respondent counties\n",
    "county_points['sf_marker'] = county_points['sf_count'] * (1/5)\n",
    "county_points['respondent_marker'] = county_points['respondent_count'] * 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create Online Appendix Figure 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot population estimates with an accurate legend\n",
    "fig, ax = plt.subplots(1, 1, figsize=(20, 20))\n",
    "states.plot(ax=ax, edgecolor='gray', facecolor='none', alpha=0.8)\n",
    "county_points.plot(markersize=county_points['sf_marker'], ax=ax, alpha=0.7, legend=True)\n",
    "\n",
    "plt.xlim(-160, -50)\n",
    "plt.ylim(20, 60)\n",
    "ax.set_axis_off()\n",
    "\n",
    "# The legend is generated not from the data that you see, but from \n",
    "# several other layers that are plotted outside the figure (at lat=lon=0).\n",
    "# This makes it easier to populate the legend box with circles of exact \n",
    "# sizes and colors.\n",
    "\n",
    "ax.scatter([0], [0], c='blue', alpha=0, s=0,\n",
    "            label='Count', edgecolor='black')\n",
    "ax.scatter([0], [0], c='blue', alpha=1, s=0.2,\n",
    "            label='1', edgecolor='black')\n",
    "ax.scatter([0], [0], c='blue', alpha=1, s=2,\n",
    "            label='10', edgecolor='black')\n",
    "ax.scatter([0], [0], c='blue', alpha=1, s=10,\n",
    "            label='50', edgecolor='black')\n",
    "ax.scatter([0], [0], c='blue', alpha=1, s=20,\n",
    "            label='100', edgecolor='black')\n",
    "\n",
    "legend = plt.legend(scatterpoints=1, frameon=True, ncol=5,\n",
    "        labelspacing=0.6, loc='lower center', fontsize=20, \n",
    "        title_fontsize=20)\n",
    "frame = legend.get_frame()\n",
    "frame.set_facecolor('white')\n",
    "frame.set_edgecolor('white')\n",
    "\n",
    "plt.savefig(f'{GRAPHICS_OUTDIR}/Online Appendix Figure 4.pdf', bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create Online Appendix Figure 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot population estimates with an accurate legend\n",
    "fig, ax = plt.subplots(1, 1, figsize=(20, 20))\n",
    "states.plot(ax=ax, edgecolor='gray', facecolor='none', alpha=0.8)\n",
    "county_points.plot(markersize=county_points['respondent_marker'], ax=ax, legend=True)\n",
    "\n",
    "plt.xlim(-160, -50)\n",
    "plt.ylim(20, 60)\n",
    "ax.set_axis_off()\n",
    "\n",
    "# The legend is generated not from the data that you see, but from \n",
    "# several other layers that are plotted outside the figure (at lat=lon=0).\n",
    "# This makes it easier to populate the legend box with circles of exact \n",
    "# sizes and colors.\n",
    "\n",
    "ax.scatter([0], [0], c='blue', alpha=0, s=0,\n",
    "            label='Count', edgecolor='black')\n",
    "ax.scatter([0], [0], c='blue', alpha=1, s=2,\n",
    "            label='1', edgecolor='black')\n",
    "ax.scatter([0], [0], c='blue', alpha=1, s=4,\n",
    "            label='2', edgecolor='black')\n",
    "ax.scatter([0], [0], c='blue', alpha=1, s=8,\n",
    "            label='4', edgecolor='black')\n",
    "ax.scatter([0], [0], c='blue', alpha=1, s=16,\n",
    "            label='8', edgecolor='black')\n",
    "\n",
    "legend = plt.legend(scatterpoints=1, frameon=True, ncol=5,\n",
    "        labelspacing=0.6, loc='lower center', fontsize=20, \n",
    "        title_fontsize=20)\n",
    "frame = legend.get_frame()\n",
    "frame.set_facecolor('white')\n",
    "frame.set_edgecolor('white')\n",
    "\n",
    "plt.savefig(f'{GRAPHICS_OUTDIR}/Online Appendix Figure 5.pdf', bbox_inches='tight')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "geo_env",
   "language": "python",
   "name": "geo_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
